{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "AI MOOC，免费人工智能学习平台：www.ai-xlab.com\n",
    "![](https://raw.githubusercontent.com/Qinbf/tf-model-zoo/master/README_IMG/01.jpg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/qin/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    }
   ],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import math\n",
    "import os\n",
    "\n",
    "import tensorflow as tf\n",
    "from im2txt import configuration\n",
    "from im2txt import inference_wrapper\n",
    "from im2txt.inference_utils import caption_generator\n",
    "from im2txt.inference_utils import vocabulary\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image\n",
    "\n",
    "import numpy as np\n",
    "import cv2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 训练好的模型存放路径\n",
    "checkpoint_path = \"./model/model.ckpt-3000000\"\n",
    "# 词汇表\n",
    "vocab_file = \"./im2txt/data/word_counts.txt\"\n",
    "# 视频路径\n",
    "input_files = \"./video/1.mp4\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Building model.\n",
      "INFO:tensorflow:Initializing vocabulary from file: ./im2txt/data/word_counts.txt\n",
      "INFO:tensorflow:Created vocabulary with 11520 words\n",
      "INFO:tensorflow:Loading model from checkpoint: ./model/model.ckpt-3000000\n",
      "INFO:tensorflow:Restoring parameters from ./model/model.ckpt-3000000\n",
      "INFO:tensorflow:Successfully loaded checkpoint: model.ckpt-3000000\n"
     ]
    }
   ],
   "source": [
    "# 载入训练好的模型\n",
    "g = tf.Graph()\n",
    "with g.as_default():\n",
    "    model = inference_wrapper.InferenceWrapper()\n",
    "    restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path)\n",
    "\n",
    "# 载入词表\n",
    "vocab = vocabulary.Vocabulary(vocab_file)\n",
    "\n",
    "with tf.Session(graph=g) as sess:\n",
    "    # 载入训练好的模型\n",
    "    restore_fn(sess)\n",
    "    generator = caption_generator.CaptionGenerator(model, vocab)\n",
    "    \n",
    "    # 从文件读取视频内容\n",
    "    cap = cv2.VideoCapture(input_files)\n",
    "    # 视频每秒传输帧数\n",
    "    fps = cap.get(cv2.CAP_PROP_FPS)\n",
    "    # 视频图像的宽度\n",
    "    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
    "    # 视频图像的长度\n",
    "    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
    "\n",
    "    fourcc = cv2.VideoWriter_fourcc('m','p','4','v')\n",
    "    out = cv2.VideoWriter('video/output.avi',fourcc,fps,(frame_width,frame_height))\n",
    "    n = 0\n",
    "    sentence = ''\n",
    "    while(True):\n",
    "        # ret 读取成功True或失败False\n",
    "        # frame读取到的图像的内容\n",
    "        # 读取一帧数据\n",
    "        ret,frame = cap.read()\n",
    "        if ret!=True:\n",
    "            break\n",
    "        n += 1\n",
    "        # 每秒生成一个新的描述\n",
    "        if n == fps:\n",
    "            n = 0\n",
    "            cv2.imwrite('video/temp.jpg', frame)\n",
    "            # 载入图片\n",
    "            image = tf.gfile.FastGFile('video/temp.jpg', 'rb').read()   \n",
    "            # 获得图片描述\n",
    "            captions = generator.beam_search(sess, image)\n",
    "            # 获得图片描述\n",
    "            sentence = [vocab.id_to_word(w) for w in captions[0].sentence[1:-1]]\n",
    "            sentence = \" \".join(sentence)\n",
    "            \n",
    "        # cv2.putText(图像, 文字, (x, y), 字体, 大小, (b, g, r), 宽度)\n",
    "        frame = cv2.putText(frame, sentence, (50, frame_height-50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)\n",
    "        # 写入frame\n",
    "        out.write(frame)\n",
    "    \n",
    "cap.release()\n",
    "cv2.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
